{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Write a YAML file based on fetching a material document that includes keys not currently accounted for by the API repository."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import yaml\n",
    "from bson.json_util import dumps as bson_dumps\n",
    "from mongogrant import Client\n",
    "\n",
    "client = Client()\n",
    "db = client.db(\"ro:prod/mp_emmet_prod\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "props = list(filter(None, db.materials.distinct(\"has\")))\n",
    "props_remaining = list(props)\n",
    "docs = []\n",
    "while props_remaining:\n",
    "    doc_with_most_props = list(db.materials.aggregate([\n",
    "        {\"$match\": {\"has\": {\"$in\": props_remaining}}},\n",
    "        {\"$project\": {\"task_id\": 1,\n",
    "                      \"has\": 1,\n",
    "                      \"nhas\": {\"$cond\":\n",
    "                               {\"if\": {\"$isArray\": \"$has\" },\n",
    "                                \"then\": {\"$size\": \"$has\"},\n",
    "                                \"else\": 0}}}},\n",
    "        {\"$sort\": {\"nhas\": -1}},\n",
    "        {\"$limit\": 1}\n",
    "    ]))[0]\n",
    "    docs.append(doc_with_most_props)\n",
    "    props_remaining = list(set(props_remaining) - set(doc_with_most_props[\"has\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for mid in [d[\"task_id\"] for d in docs]:\n",
    "    doc = json.loads(bson_dumps(db.materials.find_one({'task_id': mid})))\n",
    "\n",
    "    with open(f\"{mid}.yaml\", \"w\") as f:\n",
    "        yaml.safe_dump(doc, f, default_flow_style=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fetch and store example task documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "\n",
    "task_type_map = defaultdict(list)\n",
    "for doc in db.materials.find({}, [\"blessed_tasks\"]):\n",
    "    for k, v in doc[\"blessed_tasks\"].items():\n",
    "        task_type_map[k].append(v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "docs = []\n",
    "for k, v in task_type_map.items():\n",
    "        tid = v[0]\n",
    "        docs.append(db.tasks.find_one({\"task_id\": tid}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for tid in [d[\"task_id\"] for d in docs]:\n",
    "    doc = json.loads(bson_dumps(db.tasks.find_one({'task_id': tid})))\n",
    "\n",
    "    with open(f\"task_{tid}.yaml\", \"w\") as f:\n",
    "        yaml.safe_dump(doc, f, default_flow_style=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
